
###############################################
###   Calculate CES registration rates      ###
###############################################

library(pacman)
p_load(dataverse, dplyr, ggplot2, haven, survey, estimatr)


# load in 2020 CES data from Dataverse
Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")
dat <-
  get_dataframe_by_name(
    filename  = "CES20_Common_OUTPUT_vv.csv",
    dataset   = "10.7910/DVN/E9N6PH",
    .f          = read.csv,
    original    = TRUE,
    server    = "dataverse.harvard.edu"
  )

# recode registration and voting  variables
dat <- dat %>%
  mutate(reg = case_when(CL_voter_status==1~1,
                         T~0))
dat <- dat %>%
  mutate(reg_report = case_when(votereg_post==1~1,
                         T~0))
dat <- dat %>%
  mutate(voted = case_when(CL_2020gvm %in% c(1,2,3,4,5)~1,
                           T~0))
dat <- dat %>%
  mutate(voted_report = case_when(CC20_401==5~1,
                           T~0))

# function to create targets for reweighting
create_targets <- function (target_design, target_formula) {
  target_mf <- model.frame(target_formula, model.frame(target_design))
  target_mm <- model.matrix(target_formula, target_mf)
  wts <- weights(target_design)
  colSums(target_mm * wts) / sum(wts) # returns vector of targets
}

# recode demographic variables
dat <- dat %>%
  mutate(race = case_when(race==1~"white",
                          race==2~"black",
                          race==3~"hispan",
                          T~"other")) %>%
  mutate(sex = case_when(gender==1~"M",
                         gender==2~"F")) %>%
  mutate(winc = case_when(faminc_new==1~"first",
                          faminc_new==2~"second",
                          faminc_new==3~"third",
                          faminc_new==4~"30-40",
                          faminc_new %in% c(5,6)~"40-60",
                          faminc_new %in% c(7,8,9)~"60+")) %>%
  mutate(educ = case_when(educ==1~"no hs",
                          educ %in% c(2,3)~"hs",
                          educ==4~"aa",
                          educ %in% c(5:6)~"ba+"))

dat <- dat %>%
  mutate(age_group = case_when(birthyr<1977~"adult",
                               birthyr>=1977&birthyr<1990~"old kid",
                               birthyr>1990~"young kid"))
# restrict to observations without missing demographics
dat <- dat %>% filter(!is.na(winc)&!is.na(sex)&!is.na(race)&!is.na(educ)&!is.na(age_group))

# create survey design object for ces data
dat_des <- svydesign(~1, data=dat)
names <- create_targets(dat_des, ~race+sex+winc+educ+age_group)

# input weighting targets to match MTO on race, sex, income, education, age group
tvec <- c(1, .06, .26, .03, .37, .085, .035, .25, .25, .25, .06, .19, .65, .45, .25)
names(tvec) <- names(names)

# reweight CES data 
w1 <- calibrate(design=dat_des,
                formula= ~race+sex+winc+educ+age_group,
                population=tvec,
                calfun="raking",
                force=TRUE)
dat$wt_mto_age <- weights(w1)/mean(weights(w1))
# check weight distribution
hist(dat$wt_mto_age, breaks=100)

# check participation rates by education
dat %>%
  group_by(educ) %>%
  summarize(reg=weighted.mean(reg, w=wt_mto_age, na.rm=T),
            voted=weighted.mean(voted, w=wt_mto_age, na.rm=T))

dat %>%
  group_by(educ) %>%
  summarize(reg=weighted.mean(reg_report, w=wt_mto_age, na.rm=T),
            voted=weighted.mean(voted_report, w=wt_mto_age, na.rm=T))

# check mean registration rates (reported and measured)
weighted.mean(dat$reg, dat$wt_mto_age, na.rm=T)
weighted.mean(dat$reg_report, dat$wt_mto_age, na.rm=T)

# check participation rates by age group
dat %>%
  group_by(age_group) %>%
  summarize(reg=weighted.mean(reg, w=wt_mto_age, na.rm=T),
            reg_report=weighted.mean(reg_report, w=wt_mto_age, na.rm=T))
